## Code for "The Determinants of Innovation Adoption Across the Policy Diffusion Life Course"
## Daniel J. Mallinson
## Diffusion Stages Script
## 2014-17-04
## 2018-02-05
## 2019-02-21
## 2019-06-05
## 2019-07-19
## 2019-08-07
## 2020-01-06
## 2020-03-14
## 2020-04-01

rm(list = ls(all = TRUE))

########################################################################
################## Load packages #######################################
########################################################################

install.packages(c("psych", "lme4", "reshape", "ggplot2", "gridExtra", "RColorBrewer", "scales", "MCMCpack", "merTools", "interactions", "sjPlot", "broom.mixed", "EnvStats"))

library(foreign)
library(psych)
library(lme4)
library(reshape)
library(plyr)
library(ggplot2)
library(gridExtra)
library(RColorBrewer)
library(scales)
library(MCMCpack)
library(lmtest)
library(splines)
library(merTools)
library(interactions)
library(sjPlot)
library(broom.mixed)
library(EnvStats)

########################################################################
################## Load data     #######################################
########################################################################

all.data <- read.csv("stages_psj_reproduction_data.csv")
data <- all.data

write.csv(data, "stages_psj_reproduction_data.csv")

#############################################################################
########################### Create Diffusion Stages #########################
#############################################################################

unique.policies <- as.matrix(unique(data$policy)) #numeric list of included policies

data$standard.time <- NA
data$time_count <- data$time_count + 1 #Can't have time values of 0 for elnormcensored

newdata <- as.data.frame(matrix(NA, nrow=1, ncol=ncol(data)+1)) #Create blank dataset for appending early adopters
names(newdata) <- c(colnames(data), "stage")

## With time count starting at 1
#Standardize time span of adoptions and sort based on Rogers (1958) categories
for(i in 1:nrow(unique.policies)){
	use.policy <- unique.policies[i,]
	use.data <- data[which(data$policy==use.policy),]
	use.data$stage <- NA
	time <- max(use.data$time_count)
	if(time > 5 & sum(use.data$adopt[use.data$adopt==1])>2){ #excludes policies with t<4 because categories are no longer mutually exclusive, also excludes policies with no more than 2 adoptions, as this makes the mle inestimable
	adopts <- use.data[which(use.data$adopt==1),]
	adopts <- adopts[order(adopts$year),]
	#adopts <- adopts[c("statenam", "year","policy", "time","adopt")]
	first.years <- unique(adopts$year)[1:2]
	first.years <- seq(first.years[1], first.years[2], 1)
	for(j in 1:length(first.years)){
	  use.data$stage[use.data$year==first.years[j]] <- 1
	}
	if(max(first.years)+1 > max(use.data$year)){
	  newdata <- rbind(newdata,use.data)
	}else{
	years.left <- seq(max(first.years)+1, max(use.data$year),1)
	for(k in 1:length(years.left)){
	  year.obs <- use.data[which(use.data$year==years.left[k]-1),]
	  year.obs <- rbind(year.obs, adopts[which(adopts$year<years.left[k]),])
	  year.obs$censor <- 0
	  year.obs$censor[year.obs$adopt==0] <- 1
	  #if(sum(year.obs$censor)==0){
	   # use.data$stage[use.data$year==years.left[k]] <- 5
	  #}else{
	  logtime <- elnormCensored(year.obs$time_count,year.obs$censor,censoring.side="right", ) 
	  mu <- logtime$parameters[1]
	  sig <- logtime$parameters[2]
	  #mean.est <- exp(mu+sig^2/2)
	  #sd.est <- sqrt((exp(sig^2)-1)*exp(2*mu+sig^2))
	  pctile <- plnorm(max(year.obs$time_count),mu,sig)*100
	  if(pctile<=2.5){
	    use.data$stage[use.data$year==years.left[k]] <- 1
	  }else if(pctile > 2.5 & pctile <= 16){
	    use.data$stage[use.data$year==years.left[k]] <- 2
	  } else if(pctile > 16 & pctile <= 50){
	    use.data$stage[use.data$year==years.left[k]] <- 3
	  } else if(pctile > 50 & pctile <= 84){
	    use.data$stage[use.data$year==years.left[k]] <- 4
	  } else if(pctile > 84){
	    use.data$stage[use.data$year==years.left[k]] <- 5
	  }
	  #}
	}
	}
	}
	newdata <- rbind(newdata,use.data)
} 

#Delete first row
newdata <- newdata[-1,]

#Check distribution of stages
table(newdata$stage)

data <- newdata

##############################################################################
############################## Table S1 ######################################
##############################################################################

#remove polarization and divided government for accurate descriptives (not in stages analysis)
myvars <- names(data) %in% c("h_diffs", "s_diffs", "avg_diffs", "divided_gov")
data <- data[!myvars]

data <- data[which(data$year<2017),]

describe.vars <- data[c("neighbor_per", "ideology_relative_hm", "congress_majortopic", "percap_log", "population_log", "legprof_squire", "citi6016", "init_avail", "init_qual", "mip", "complexity_topic", "nyt")]

describe (describe.vars[which(data$stage==1),], skew=FALSE, ranges=FALSE, na.rm=TRUE) #Innovator Stage
describe (describe.vars[which(data$stage==2),], skew=FALSE, ranges=FALSE, na.rm=TRUE) #Early Adopter Stage
describe (describe.vars[which(data$stage==3),], skew=FALSE, ranges=FALSE, na.rm=TRUE) #Early Majority Stage
describe (describe.vars[which(data$stage==4),], skew=FALSE, ranges=FALSE, na.rm=TRUE) #Late Majority Stage
describe (describe.vars[which(data$stage==5),], skew=FALSE, ranges=FALSE, na.rm=TRUE) #Laggard Stage

##############################################################################
#################### Rescale Variables #######################################
##############################################################################

scalevars <- c("neighbor_per","ideology_relative_hm","citi6016","percap_log","population_log",
                             "legprof_squire","mip","congress_majortopic", "nyt", "time")
data[scalevars] <- scale(data[scalevars])

##############################################################################
############################### Figure 1  ####################################
##############################################################################

#pdf("adopter_categories.pdf")
jpeg("figure1.jpg", height=4, width=11, units="in", res=1000)
set.seed(3000)
x <- seq(-3,3, length=10000)
y <- dnorm(x, mean=0, 1)
cumulative <- pnorm(x,0,1)
par(mar=c(5,1,1,1))
layout(matrix(c(1,1,1,2,2), 1, 5, byrow=TRUE), widths=c(3,2))
plot(x,y, type="l", lwd=1.5, xlim=c(-3,3), axes=FALSE, xlab="", ylab="")
title(main="", xlab="Time", ylab="", sub="(a)")
segments(-1.96,-1, -1.96, 0.058, lwd=1.5)
segments(-.995, -1, -.995, .243, lwd=1.5)
segments(0,-1,0,0.398, lwd=1.5)
segments(.995,-1,.995,.243, lwd=1.5)
ax1name <- c("", bquote(paste(bar(x), " - 2sd")), bquote(paste(bar(x), " - sd")), expression(bar(x)), bquote(paste(bar(x), " + sd")), "")
axis(1, at = c(-3, -1.96, -.995, 0, .995, 3), labels = ax1name, tck=0)
text(-2.5,.05, c("Innovators \n 2.5%"),pos=3, offset=0, cex=1)
text(-1.45, 0, c("Early Adopters \n 13.5%"), pos=3, offset=0, cex=1)
text(-0.5, 0, c("Early Majority \n 34%"), pos=3, offset=0, cex=1)
text(0.5, 0, c("Late Majority \n 34%"), pos=3, offset=0, cex=1)
text(1.6, 0, c("Laggards \n 16%"), pos=3, offset=0, cex=1)
arrows(-2.3,.05,-2.2,0, code=2, lwd=1.5)
curve(pnorm(x,0,1),xlim=c(-3,3), xlab="", ylab="", axes=FALSE)
title(main="", xlab="Time", ylab="Cumulative Adoptions", sub="(b)")
cord.x <- c(-3,seq(-3,-2,0.01),-2)
cord.y <- c(0,pnorm(seq(-3,-2,0.01)),0)
polygon(cord.x,cord.y,col="gray70")
cord.x <- c(-2,seq(-2,-1,0.01),-1)
cord.y <- c(0,pnorm(seq(-2,-1,0.01)),0)
polygon(cord.x,cord.y,col="gray70")
cord.x <- c(-1,seq(-1,0,0.01),0)
cord.y <- c(0,pnorm(seq(-1,0,0.01)),0)
polygon(cord.x,cord.y,col="gray70")
cord.x <- c(0,seq(0,1,0.01),1)
cord.y <- c(0,pnorm(seq(0,1,0.01)),0)
polygon(cord.x,cord.y,col="gray70")
cord.x <- c(1,seq(1,3,0.01),3)
cord.y <- c(0,pnorm(seq(1,3,0.01)),0)
polygon(cord.x,cord.y,col="gray70")
ax1name <- c("", bquote(paste(bar(x), " - 2sd")), bquote(paste(bar(x), " - sd")), expression(bar(x)), bquote(paste(bar(x), " + sd")), "")
axis(1, at = c(-3, -1.96, -.995, 0, .995, 3), labels = ax1name, tck=0)
axis(2, at=c(0,0.2,0.4,0.6,0.8,1.0), labels=c(0,0.2,0.4,0.6,0.8,1.0), las=2)
dev.off()

#Guidance: http://www.statmethods.net/advgraphs/probability.html

##############################################################################
############################ Figure 2  #######################################
##############################################################################

adopts <- data[which(data$adopt==1),]
adopts.tab <- prop.table(table(adopts$adopt, adopts$stage))*100

#pdf("adopts_barplot.pdf", height=5, width=8)
jpeg("figure2.jpg", height=5, width=8, units="in", res=800)
par(mar=c(2,4,1,1))
adopt.bar <- barplot(adopts.tab, ylim=c(0,43), axes=FALSE, names.arg=c("Innovator", "Early Adopt", "Early Majority", "Late Majority", "Laggard"), ylab="Percentage of Adoptions", cex.names=0.8)
text(adopt.bar, adopts.tab+1, round(adopts.tab,1), cex=0.8)
axis(2, at=c(0,10,20,30,40), c(0,10,20,30,40), las=2, cex=0.8)
dev.off()

##############################################################################
######################### Functional Form ####################################
#########################     Table S2    ####################################
##############################################################################

## Checking functional form assumption
model.logit <- glm(adopt ~ 
                        + neighbor_per + ideology_relative_hm + congress_majortopic
                      + init_avail + init_qual + legprof_squire + citi6016
                      + percap_log + population_log
                      + mip + complexity_topic + mip*complexity_topic
                      + nyt + time, family=binomial(link="logit"), data=data)
summary(model.logit)
round(coef(model.logit),3)
round(sqrt(diag(vcov(model.logit))),3)

model.probit <- glm(adopt ~ 
                       + neighbor_per + ideology_relative_hm + congress_majortopic
                      + init_avail + init_qual + legprof_squire + citi6016
                      + percap_log + population_log
                      + mip + complexity_topic + mip*complexity_topic
                      + nyt + time, family=binomial(link="probit"), data=data)
summary(model.probit)
round(coef(model.probit),3)
round(sqrt(diag(vcov(model.probit))),3)

lrtest(model.logit, model.probit)

model.cloglog <- glm(adopt ~ 
                        + neighbor_per + ideology_relative_hm + congress_majortopic
                      + init_avail + init_qual + legprof_squire + citi6016
                      + percap_log + population_log
                      + mip + complexity_topic + mip*complexity_topic
                      + nyt + time, family=binomial(link="cloglog"), data=data)
summary(model.cloglog)
round(coef(model.cloglog),3)
round(sqrt(diag(vcov(model.cloglog))),3)

lrtest(model.logit, model.cloglog)

#N
nobs(model.logit) #338076


##############################################################################
######################### Duration Dependence ################################
#########################      Table S 3      ################################
##############################################################################

model.time <- glm(adopt ~ 
                      + neighbor_per + ideology_relative_hm + congress_majortopic
                      + init_avail + init_qual + legprof_squire + citi6016
                      + percap_log + population_log
                      + mip + complexity_topic + mip*complexity_topic
                      + nyt + time, family=binomial(link="logit"), data=data)
summary(model.time)
round(coef(model.time),3)
round(sqrt(diag(vcov(model.time))),3)

model.logtime <- glm(adopt ~ 
                        + neighbor_per + ideology_relative_hm + congress_majortopic
                      + init_avail + init_qual + legprof_squire + citi6016
                      + percap_log + population_log
                      + mip + complexity_topic + mip*complexity_topic
                      + nyt + time_log, family=binomial(link="logit"), data=data)

summary(model.logtime)
round(coef(model.logtime),3)
round(sqrt(diag(vcov(model.logtime))),3)

lrtest(model.time, model.logtime)

model.polytime <- glm(adopt ~ 
                        + neighbor_per + ideology_relative_hm + congress_majortopic
                      + init_avail + init_qual + legprof_squire + citi6016
                      + percap_log + population_log
                      + mip + complexity_topic + mip*complexity_topic
                      + nyt + poly(time,3), family=binomial(link="logit"), data=data)
summary(model.polytime)
round(coef(model.polytime),3)
round(sqrt(diag(vcov(model.polytime))),3)

lrtest(model.time, model.polytime)

model.splinetime <- glm(adopt ~ 
                        + neighbor_per + ideology_relative_hm + congress_majortopic
                      + init_avail + init_qual + legprof_squire + citi6016
                      + percap_log + population_log
                      + mip + complexity_topic + mip*complexity_topic
                      + nyt + ns(time, df=3), family=binomial(link="logit"), data=data)
summary(model.splinetime)
round(coef(model.splinetime),3)
round(sqrt(diag(vcov(model.splinetime))),3)

lrtest(model.time, model.splinetime)

##############################################################################
########################### Random Effects ###################################
##############################################################################

## Examine which random effect structure best fits the data using guidance from 
## Field, Andy, Jeremy Miles, and Zoe Field. 2012.  Discovering Statistics Using R. SAGE. 

interceptonly <- glm(adopt~1, family=binomial(link="logit"), data=data)

policyintercept <- glmer(adopt~ 1|policy, family=binomial(link="logit"), data=data)

bothintercept <- glmer(adopt~ (1|policy) + (1|state), family=binomial(link="logit"), data=data)
reint <- REsim(bothintercept)
plotREsim(reint)

anova(policyintercept, bothintercept, interceptonly) ## both intercepts model is the best fit

##############################################################################
########################### Data Analysis ####################################
##############################################################################

## Base model with no interactions included
model.base <- glmer(adopt ~ 
                      neighbor_per + ideology_relative_hm + congress_majortopic
                      + init_avail + init_qual + legprof_squire + citi6016 
                      + percap_log + population_log
                      + mip + complexity_topic + mip*complexity_topic
			  + nyt + time + (1|policy) + (1|state), 
                      control=glmerControl(optCtrl=list("optim", maxfun=5e5)), family=binomial(link="logit"), data=data)

ss <- getME(model.base,c("theta","fixef"))
model.base <- update(model.base,start=ss,control=glmerControl(optCtrl=list(maxfun=2e4)))
summary(model.base) 

## Model in Table 3

model.interact <- glmer(adopt ~ 
				neighbor_per*stage + ideology_relative_hm*stage
				+ congress_majortopic*stage + init_avail + init_qual + legprof_squire*stage + citi6016 
				+ percap_log*stage + population_log*stage
				+ mip + complexity_topic + nyt + time + 
				+ (1|policy), 
                      control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=5e5)), family=binomial(link="logit"), data=data)

#This step may need to be repeated several times to achieve convergence
ss <- getME(model.interact,c("theta","fixef"))
model.interact <- update(model.interact,start=ss,control=glmerControl(optimizer="Nelder_Mead", optCtrl=list(maxfun=2e9)))
summary(model.interact)
nobs(model.interact)

ors <- tidy(model.interact, conf.int=TRUE,exponentiate=TRUE,effects="fixed")
ors %>% print(n=nrow(ors))

## Model in Table S4 (Bowen and Greene Professionalism)

model.interact.be <- glmer(adopt ~ 
				neighbor_per*stage + ideology_relative_hm*stage
				+ congress_majortopic*stage + init_avail + init_qual + mds1*stage + mds2*stage + citi6016 
				+ percap_log*stage + population_log*stage
				+ mip + complexity_topic + nyt + time + 
				+ (1|policy), 
                      control=glmerControl(optimizer="optimx", optCtrl=list(maxfun=5e5, method="nlminb")), family=binomial(link="logit"), data=data)

#This step may need to be repeated several times to achieve convergence
ss <- getME(model.interact.be,c("theta","fixef"))
model.interact <- update(model.interact.be,start=ss,control=glmerControl(optimizer="Nelder_Mead", optCtrl=list(maxfun=2e9)), nAGQ=1)
summary(model.interact.be)
nobs(model.interact.be)

ors <- tidy(model.interact.be, conf.int=TRUE,exponentiate=TRUE,effects="fixed")
ors %>% print(n=nrow(ors))

interplot(m=model.interact.be, var1="mds1", var2="stage", hist=FALSE, adjCI=TRUE, point=TRUE) +
  xlab("Diffusion Stage") + 
  ylab("Marginal Effect of Legislative Professionalism") + 
  theme_bw() + 
  geom_hline(yintercept=0, linetype="solid")

##############################################################################
######################### Figures 3-5 ########################################
##############################################################################

a <- interplot(m=model.interact, var1="neighbor_per", var2="stage", hist=FALSE, adjCI=TRUE, point=TRUE) +
  xlab("Diffusion Stage") + 
  ylab("Marginal Effect of Neighbor Adoptions") + 
  theme_bw() + 
  geom_hline(yintercept=0, linetype="solid")

b <- interplot(m=model.interact, var1="ideology_relative_hm", var2="stage", hist=FALSE, adjCI=TRUE, point=TRUE) +
  xlab("Diffusion Stage") + 
  ylab("Marginal Effect of Ideological Distance") + 
  theme_bw() + 
  geom_hline(yintercept=0, linetype="solid")

c <- interplot(m=model.interact, var1="congress_majortopic", var2="stage", hist=FALSE, adjCI=TRUE, point=TRUE) +
  xlab("Diffusion Stage") + 
  ylab("Marginal Effect of Congressional Hearings") + 
  theme_bw() + 
  geom_hline(yintercept=0, linetype="solid")

d <- interplot(m=model.interact, var1="legprof_squire", var2="stage", hist=FALSE, adjCI=TRUE, point=TRUE) +
  xlab("Diffusion Stage") + 
  ylab("Marginal Effect of Legislative Professionalism") + 
  theme_bw() + 
  geom_hline(yintercept=0, linetype="solid")

e <- interplot(m=model.interact, var1="percap_log", var2="stage", hist=FALSE, adjCI=TRUE, point=TRUE) +
  xlab("Diffusion Stage") + 
  ylab("Marginal Effect of Per Capita Income") + 
  theme_bw() + 
  geom_hline(yintercept=0, linetype="solid")

f <- interplot(m=model.interact, var1="population_log", var2="stage", hist=FALSE, adjCI=TRUE, point=TRUE) +
  xlab("Diffusion Stage") + 
  ylab("Marginal Effect of Population") + 
  theme_bw() + 
  geom_hline(yintercept=0, linetype="solid")


g <- interact_plot(model.interact, pred="neighbor_per", modx="stage", modx.values=c(1,2,3,4,5),
	interval=TRUE, int.width=.95, color.class="Set1", x.label="Proportion of Neighbor Adoptions (Standardized)",
	y.label="Probability of Adoption", adjCI=TRUE)

h <- interact_plot(model.interact, pred="ideology_relative_hm", modx="stage", modx.values=c(1,2,3,4,5),
	interval=TRUE, int.width=.95, color.class="Set1", x.label="Relative Ideology (Standardized)",
	y.label="Probability of Adoption", adjCI=TRUE)

i <- interact_plot(model.interact, pred="congress_majortopic", modx="stage", modx.values=c(1,2,3,4,5),
	interval=TRUE, int.width=.95, color.class="Set1", x.label="Congressional Hearings (Standardized)",
	y.label="Probability of Adoption", adjCI=TRUE)

j <- interact_plot(model.interact, pred="legprof_squire", modx="stage", modx.values=c(1,2,3,4,5),
	interval=TRUE, int.width=.95, color.class="Set1", x.label="Legislative Professionalism (Standardized)",
	y.label="Probability of Adoption", adjCI=TRUE)

k <- interact_plot(model.interact, pred="percap_log", modx="stage", modx.values=c(1,2,3,4,5),
	interval=TRUE, int.width=.95, color.class="Set1", x.label="Per Capita Income (Logged and Standardized)",
	y.label="Probability of Adoption", adjCI=TRUE)

l <- interact_plot(model.interact, pred="population_log", modx="stage", modx.values=c(1,2,3,4,5),
	interval=TRUE, int.width=.95, color.class="Set1", x.label="Population (Logged and Standardized)",
	y.label="Probability of Adoption", adjCI=TRUE)


jpeg("figure3.jpg", width=10, height=8, units="in", res=600)
grid.arrange(a,b,g,h,nrow=2)
dev.off()

jpeg("figure4.jpg", width=10, height=8, units="in", res=600)
grid.arrange(c,d,i,j,nrow=2)
dev.off()

jpeg("figure5.jpg", width=10, height=8, units="in", res=600)
grid.arrange(e,f,k,l,nrow=2)
dev.off()
